library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(stringr)
library(ggpubr)
library(broom)
library(AICcmodavg)
library(knitr)
library(DT)
df<- read_csv("Data/FacultySalaries_1995.csv")%>% clean_names() %>%
pivot_longer(c(ends_with("salary")), names_to = "rank", values_to = "salary",names_prefix = "avg_") %>%
pivot_longer(c(ends_with("comp")), names_to = "comp_type", values_to = "comp_amt") %>%
pivot_longer(c(num_full_profs,num_assoc_profs,num_assist_profs), names_to = "faculty_type", values_to = "faculty_count")
## Rows: 1161 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): UnivName, State, Tier
## dbl (14): FedID, AvgFullProfSalary, AvgAssocProfSalary, AvgAssistProfSalary,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- df %>% filter(!df$tier == "VIIB")
datatable(df, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T))
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
#Graph
df %>%
ggplot(aes(x=rank, y=salary,fill=rank))+
geom_boxplot()+
facet_wrap(~tier)+
theme_minimal()+
theme(axis.text.x = element_text(angle = 90))
## Warning: Removed 1152 rows containing non-finite values (`stat_boxplot()`).
twoway<-aov(salary~state+rank+tier, data = df)
summary(twoway)
## Df Sum Sq Mean Sq F value Pr(>F)
## state 50 51004114 1020082 284.7 <2e-16 ***
## rank 2 150993010 75496505 21071.8 <2e-16 ***
## tier 2 61397872 30698936 8568.3 <2e-16 ***
## Residuals 30113 107889768 3583
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1152 observations deleted due to missingness
##TASK 3
df2 <- read_csv("Data/Juniper_Oils.csv") %>% pivot_longer(c("alpha-pinene","para-cymene","alpha-terpineol","cedr-9-ene","alpha-cedrene","beta-cedrene","cis-thujopsene","alpha-himachalene","beta-chamigrene","cuparene","compound 1","alpha-chamigrene","widdrol","cedrol","beta-acorenol","alpha-acorenol","gamma-eudesmol","beta-eudesmol","alpha-eudesmol","cedr-8-en-13-ol","cedr-8-en-15-ol","compound 2","thujopsenal"),names_to = "chem_comp",values_to = "concentration") %>% clean_names()
## Rows: 34 Columns: 41
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): SampleID, Project, Amplicon, Tree_Species, Field_Office, BLM_Fire_...
## dbl (34): BurnYear, Latitude, Longitude, alpha-pinene, para-cymene, alpha-te...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(df2,n=5)
## # A tibble: 5 × 20
## sample_id project ampli…¹ tree_…² burn_…³ latit…⁴ longi…⁵ field…⁶ blm_f…⁷
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 LOG-16S-SL12 JuniperL… 16S Junipe… 2018 41.6 -114. Salt_L… Ridge
## 2 LOG-16S-SL12 JuniperL… 16S Junipe… 2018 41.6 -114. Salt_L… Ridge
## 3 LOG-16S-SL12 JuniperL… 16S Junipe… 2018 41.6 -114. Salt_L… Ridge
## 4 LOG-16S-SL12 JuniperL… 16S Junipe… 2018 41.6 -114. Salt_L… Ridge
## 5 LOG-16S-SL12 JuniperL… 16S Junipe… 2018 41.6 -114. Salt_L… Ridge
## # … with 11 more variables: tracking_number <chr>, yield_percent <dbl>,
## # bolt_surface_area_cm2 <dbl>, raw_exit_holes_per_cm2 <dbl>,
## # raw_exit_holes <dbl>, living_larvae <dbl>, chem_total <dbl>,
## # chem_mean <dbl>, years_since_burn <dbl>, chem_comp <chr>,
## # concentration <dbl>, and abbreviated variable names ¹amplicon,
## # ²tree_species, ³burn_year, ⁴latitude, ⁵longitude, ⁶field_office,
## # ⁷blm_fire_name
##Task 4: Graph
df2 %>% ggplot(aes(x=years_since_burn,y=concentration)) + geom_smooth() + facet_wrap(~chem_comp,scales = "free_y")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
##5: generalized linear model
library(broom)
mod1 <- glm(data=df2,formula = concentration ~ years_since_burn+ chem_comp)
df3 <- tidy(mod1,conf.level = TRUE)
df4<- df3 %>% filter(df3$p.value<.05)
kable(x = df4,align = "c",caption ="Statistically significant compounds",col.names =c("compound", "estimate" ,"std.error","statistic", "p.value"))
| compound | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| chem_compalpha-cedrene | 10.623529 | 0.8993768 | 11.812101 | 0.0000000 |
| chem_compbeta-cedrene | 2.776471 | 0.8993768 | 3.087105 | 0.0020948 |
| chem_compbeta-eudesmol | 1.788235 | 0.8993768 | 1.988305 | 0.0471372 |
| chem_compcedr-8-en-13-ol | 5.552941 | 0.8993768 | 6.174210 | 0.0000000 |
| chem_compcedrol | 19.823529 | 0.8993768 | 22.041407 | 0.0000000 |
| chem_compcis-thujopsene | 21.298824 | 0.8993768 | 23.681759 | 0.0000000 |
| chem_compcompound 1 | 1.817647 | 0.8993768 | 2.021007 | 0.0436301 |
| chem_compcuparene | 1.923529 | 0.8993768 | 2.138736 | 0.0327760 |
| chem_compthujopsenal | 1.788235 | 0.8993768 | 1.988305 | 0.0471372 |
| chem_compwiddrol | 6.135294 | 0.8993768 | 6.821718 | 0.0000000 |